rm(list = ls())
library("knitr") # pour avoir un format table dans les sorties
library("ggplot2") # pour avoir de "beaux"" graphiques
library("FactoMineR") # pour effectuer l'ACP
library("factoextra") # pour extraire et visualiser les résultats issus de FactoMineR
library("corrplot") # pour avoir une représentation des corrélations
library("plotly")

Présentation des données

EauxMinerales = read.csv2("https://raw.githubusercontent.com/GasparMassiot/GMN3A/master/data/EauxMinerales.csv", dec=",", row.names=1)
knitr::kable(head(EauxMinerales))
Ca Mg K Na Si SO4 Cl NO3 Gaz
Alet 63.0 23.00 1.8 13.0 300.0 14.00 11.0 2.0 NG
Arcens 88.0 66.00 7.0 290.0 1280.0 3.77 52.0 0.0 G
Ariegeoise 3.7 0.58 0.5 1.8 6.7 8.00 0.8 1.0 NG
Arvie 183.0 93.00 144.0 616.0 2306.0 50.00 305.0 0.0 G
Auvergne 4.0 1.80 0.6 3.6 24.4 1.50 0.9 0.5 NG
Beaumont 10.8 4.20 1.3 20.5 82.8 8.10 5.2 0.8 NG
dim(EauxMinerales)
## [1] 35  9
# Statistiques simples
summary(EauxMinerales)
##        Ca              Mg               K                Na               Si              SO4               Cl              NO3             Gaz           
##  Min.   :  1.2   Min.   :  0.20   Min.   :  0.40   Min.   :   1.4   Min.   :   1.0   Min.   :   1.5   Min.   :  0.80   Min.   : 0.000   Length:35         
##  1st Qu.: 55.5   1st Qu.:  5.80   1st Qu.:  1.00   1st Qu.:   7.0   1st Qu.: 178.1   1st Qu.:  12.0   1st Qu.:  5.65   1st Qu.: 0.125   Class :character  
##  Median : 78.0   Median : 12.00   Median :  3.00   Median :  13.5   Median : 250.0   Median :  25.0   Median : 18.45   Median : 1.500   Mode  :character  
##  Mean   :122.0   Mean   : 28.06   Mean   : 17.74   Mean   : 184.1   Mean   : 624.6   Mean   : 198.3   Mean   : 48.50   Mean   : 5.747                     
##  3rd Qu.:160.5   3rd Qu.: 42.00   3rd Qu.:  6.35   3rd Qu.: 152.6   3rd Qu.: 401.5   3rd Qu.: 146.8   3rd Qu.: 39.50   3rd Qu.: 3.150                     
##  Max.   :555.0   Max.   :110.00   Max.   :144.00   Max.   :1744.0   Max.   :4263.0   Max.   :1479.0   Max.   :329.00   Max.   :82.000                     
##                                                                                                       NA's   :1        NA's   :1
# Corrélation
correlation=cor(EauxMinerales[-33,1:(dim(EauxMinerales)[2]-1)])
kable(correlation,digits=2)
Ca Mg K Na Si SO4 Cl NO3
Ca 1.00 0.75 0.04 -0.08 0.08 0.83 -0.05 -0.07
Mg 0.75 1.00 0.37 0.11 0.17 0.68 0.10 0.19
K 0.04 0.37 1.00 0.83 0.78 -0.01 0.76 0.24
Na -0.08 0.11 0.83 1.00 0.87 0.00 0.77 0.22
Si 0.08 0.17 0.78 0.87 1.00 -0.09 0.75 -0.20
SO4 0.83 0.68 -0.01 0.00 -0.09 1.00 -0.10 0.24
Cl -0.05 0.10 0.76 0.77 0.75 -0.10 1.00 -0.07
NO3 -0.07 0.19 0.24 0.22 -0.20 0.24 -0.07 1.00
# Visualisation des corrélations
corrplot(correlation)

Analyse en composantes principales

resPCA=PCA(EauxMinerales[-33,], scale.unit=T, quali.sup=9, graph = F)
# les variables supplémentaires sont intégrées au graphe mais ne sont pas
# prises en compte pour l'ACP

Analyse de l’inertie des axes factoriels

get_eigenvalue(resPCA)
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.456262127      43.20327659                    43.20328
## Dim.2 2.525381046      31.56726307                    74.77054
## Dim.3 1.177107787      14.71384734                    89.48439
## Dim.4 0.385956448       4.82445559                    94.30884
## Dim.5 0.270901788       3.38627235                    97.69511
## Dim.6 0.103206116       1.29007645                    98.98519
## Dim.7 0.076987286       0.96234108                    99.94753
## Dim.8 0.004197402       0.05246753                   100.00000
fviz_eig(resPCA, addlabels = T) + geom_hline(yintercept = 10, color = "red")

Etude des variables

var=get_pca_var(resPCA)
ggplotly(fviz_pca_var(resPCA, geom = c("text","arrow"), col.var = "cos2", axes=1:2) + theme_classic())
fviz_pca_var(resPCA, geom = c("text","arrow"), col.var = "cos2", axes=3:4) + theme_classic()

Etude des individus

ind=get_pca_ind(resPCA)
cos2 = rowSums(resPCA$ind$cos2[, 1:2])
fviz_pca_ind (resPCA, col.ind = "cos2",axes=1:2,
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # pour éviter le chevauchement de texte
)

cos2 = rowSums(resPCA$ind$cos2[,3:4])
fviz_pca_ind (resPCA, col.ind = "cos2",axes=3:4,
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)

Biplot

fviz_pca_biplot(resPCA, repel = TRUE,
col.var = "#2E9FDF", # Couleur des variables
col.ind = "#696969" # Couleur des individus
)